home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
- #
- # Acknowledgements
- #
- # Thanks to Guy Brooker (guy@jw.estec.esa.nl) for his AA interface,
- # which was the starting point for this program.
- #
- # Paul Clark
- # paul@cs.arizona.edu
- #
- # Modifications
- #
- # 2/22/94 Version 1.0, shell script version Paul Clark
- # 4/21/94 Version 1.1, multiple archives support Paul Clark
- # 4/22/94 Version 1.2, perl script Paul Clark
- # 4/26/94 rewritten from aglimpse script
-
- # **** **** **** **** CONFIGURABLE VARIABLES **** **** **** ****
- $HTTPD_HOME="/usr1/paul/httpd" ;
- $HTTPD_NEWSHOME="/usr1/paul/news" ;
- $GLIMPSE_LOC="/usr/paul/bin/glimpse" ;
-
- # **** **** **** **** NO CONFIGURATION NEEDED BELOW **** **** **** ****
-
- $FSSERV="/cgi-bin/article" ;
-
- # To support an ISINDEX type search, set query string if given
- # an argument on the command line
- if ( $#ARGV >= 0 ) {
- $prefix= "whole=on&case=on&query=";
- }
-
- if ( $ENV{'PATH_INFO'} ) {
- # old-fashioned way to give newsgroup
- $newsgroup = substr($ENV{'PATH_INFO'},1);
- $newsgroup =~ s|"||g;
- $prefix= "group=" . $newsgroup . "&". $prefix;
- }
-
- # Check that a query has been made
- $query = $ENV{'QUERY_STRING'};
- $query =~ s|"||g;
-
- # Strip the variables out from the query string,
- # and assign them into variables, prefixed by 'QS_'
- @qvars = split( /\&/, $prefix . $query );
- #print "Content-type: text/plain\n\n" ; #debug
- foreach (@qvars) {
- split(/=/);
- $fname = $_[0];
- $fvalue = $_[1];
- $cmd = "\$QS_$fname = \"$fvalue\";" ;
- # print "$cmd\n"; #debug
- $cmd = eval $cmd if ( $fname =~ /^[a-z_A-Z]\w*$/ );
- }
-
- $newsgroup = $QS_group;
- $newsgroup =~ tr/A-Z/a-z/;
- $indexdir = $HTTPD_NEWSHOME . "/indices/" . $newsgroup;
- $ENV{'HOME'} = $indexdir; # some versions of Glimpse need it
-
- # Ensure that Glimpse is available on this machine
- -x $GLIMPSE_LOC || &err_noglimpse ;
-
- # Ensure that index is available
- -r "$indexdir/.glimpse_index" || &err_noindex ;
-
- $QS_query =~ s|\+| |g;
- $QS_query =~ s/%(\w\w)/sprintf("%c", hex($1))/ge;
- $QS_query =~ s|"||g;
-
- #if no query has been made, provide ISINDEX type of reply
- $QS_query || &err_noquery ;
-
- $OPT_errors="-$QS_errors" if $QS_errors =~ /^[0-8]$/;
- $OPT_errors="-B -y" if $QS_errors =~ /^Best\+match$/;
- $OPT_case="-i" if $QS_case =~ /^on$/;
- $OPT_whole="-w" unless $QS_whole =~ /^on$/;
-
- if ($QS_maxlines =~ /\d+/) {
- $maxlines = $&;
- } else {
- $maxlines = 20;
- }
- if ($QS_maxfiles =~ /\d+/) {
- $maxfiles = $&;
- } else {
- $maxfiles = 100;
- }
-
- print "Content-type: text/html\n\n" ;
- print "<HEAD><TITLE>Search for \"$QS_query\" in \"$newsgroup\"\n";
- print "</TITLE></HEAD><BODY>\n";
- print "<H1>Glimpse search for \"$QS_query\"<BR>".
- "in newsgroup \"$newsgroup\"</H1><HR>\n";
-
- chdir $indexdir;
- unlink <.glimpse_tmp*> ;
- $cmd = "exec $GLIMPSE_LOC -z -y -n $OPT_case $OPT_whole $OPT_errors -H ." .
- "$OPT_filter \"$QS_query\" 2>&1 |";
- # print $cmd,"\n",`pwd`;
- $gpid = open(GOUT, $cmd );
- $prevfile = "";
- $lcount = 0;
- $fcount = 0;
- line: while (<GOUT>) {
- s/&/\&/g;
- s/</\</g;
- s/>/\>/g;
- ( /^([^ :]*):\s*([0-9][0-9]*):(.*)/ ) || next;
- $file = $1;
- $line = $2;
- $string = $3;
- $file =~ s|.*groups(/[^/]*/[^/]*)$|$1|;
- if ($file ne $prevfile) {
- $fcount++ ;
- $linecount = -1;
- if ($fcount>$maxfiles) {
- print "<H3>Limit of $maxfiles " .
- "articles exceeded...</H3>\n";
- $file = "";
- $fcount = "at least $fcount";
- $lcount = "at least $lcount";
- last line;
- }
- print "</UL>" if ( $prevfile ne "" );
- $prevfile = $file ;
- print "<H3>Article <A HREF=\"",$FSSERV,$file,
- "\">",$file,"</A></H3><UL>\n" ;
- }
- $lcount++ ;
- $linecount++;
- if ($linecount>=$maxlines) {
- print "<LI>Limit of $maxlines matched " .
- "lines per file exceeded...\n" if
- $linecount==$maxlines;
- next line;
- }
- print "<LI><A HREF=\"",$FSSERV,$file,"?",$line,"#mfs\">\n" ;
- print "line ",$line,":",$string,"</A>\n" ;
- }
- print "</UL>\n" if $file ;
- print "<HR>" ;
- print "<H3>Summary for query <code>\"",$QS_query,"\"</code>:</H3>\n" ;
- print "found ",$lcount," matches in ",$fcount," articles\n" ;
- print "</BODY>\n" ;
- close(GOUT);
- unlink "/tmp/.glimpse_tmp.$gpid";
-
- sub diag_exit {
- # exit on error
- exit 1;
- }
- sub err_noquery {
- # The script was called without a query.
- # Provide an ISINDEX type response for browsers
- # without form support.
- print "Content-type: text/html\n\n";
- print "<HEAD><TITLE>Search newsgroup \"$newsgroup\"</TITLE></HEAD>\n";
- print "<BODY><H1> Search newsgroup \"$newsgroup\" </H1>\n";
- print "Welcome to the gateway to \"$newsgroup\".\n";
- print <<'EOM' ;
- Type a pattern to search in your browser's
- search dialog. Query is case-insensitive by default<P>
-
- <ISINDEX>
-
- <H2>IMPORTANT !</H2>
- <QUOTE>
- <UL>
- <LI>This is an experimental service. Therefore, its functionality
- may change or discontinue at any moment without prior notice.
- <LI>Please report any malfunctions of this gateway to the
- address below.
- <LI>All access operations on the server are logged.
- If you discover any security leaks, don't use them - report them to
- the address below.
- </UL>
- </QUOTE>
-
- <HR>
- <ADDRESS>
- Your name<BR>
- login@your.host.here<BR>
- </ADDRESS>
-
- </BODY>
-
- EOM
- &diag_exit;
- }
-
- sub err_noglimpse {
- #
- # Glimpse was not found
- # Report a useful message
- #
- print <<'EOM' ;
- Content-type: text/html
-
- <HEAD>
- <TITLE>Glimpse not found</TITLE>
- </HEAD>
- <BODY>
- <H1>Glimpse not found</H1>
-
- This gateway relies on <CODE>Glimpse</CODE> search tool.
- If it is installed, please set the correct path in the script file.
- Otherwise obtain the latest version from
- <A HREF="file://ftp.cs.arizona.edu/glimpse">ftp.cs.arizona.edu</A>
- </BODY>
- EOM
- &diag_exit;
- }
-
- sub err_noindex {
- print "Content-type: text/html\n\n";
- print "<HEAD><TITLE>Newsgroup $newsgroup not found</TITLE>\n";
- print "</HEAD> <BODY>\n";
- print "<H1>Newsgroup $newsgroup not found</H1>\n";
- print <<'EOM' ;
- Please remember: this is NOT a news server. It has only a few
- newsgroups. Please use it ONLY for the search of a particular article,
- NOT for reading the news.
- </BODY>
- EOM
- &diag_exit;
- }
-